cbpal <- c("#CC79A7", # purple, primary
"#E69F00", # yellow, A
"#56B4E9", # blue, B
"#009E73", # green, C
"#7f7f7f") #gray, tcga
# set up themes for ggplotting
theme <- theme(text = element_text(size=16),
axis.text.y = element_text(size=18, face='bold'),
axis.text.x = element_text(size=18, face='bold'),
axis.title = element_text(size=20, face = "bold"),
legend.text= element_text(size=18),
legend.title= element_text(size=20, face='bold'),
strip.text=element_text(size=24, face='bold'))
# load functions
source('functions.r')ihc_results2 <- read_csv('../data/ihc.results.csv.gz') %>%
filter(variable=='percentage') %>%
dplyr::select(sample, cell_type, value)
ihc_res <- ihc_results2 %>%
filter(!cell_type %in% c('DC-LAMP+', 'CD3+FOXP3+')) # we only visualize markers for which all the samples could be quantified.
ihc_res$cell_type <- factor(as.character(ihc_res$cell_type), levels=rev(c('CD3+','CD20+',"CD68+","PDL1+")))
colors <- c("255 250 75", # CD3
"253 43 49", # CD20
"56 253 254", # CD68
"249 74 245" # PDL1
)
colors <- sapply(strsplit(colors, " "), function(x)
rgb(x[1], x[2], x[3], maxColorValue=255))
ihc_barplot2 <- ggplot(data=ihc_res) +
aes(x=cell_type, y=value*100, fill=cell_type) +
geom_bar(position='dodge', stat='identity') +
theme_classic() + theme +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
facet_wrap(~sample, ncol=1) +
scale_fill_manual(values = rev(colors)) +
labs(y='percent (%)', x='', title='') + coord_flip()
ihc_barplot2somatic_mutations <- read_tsv("../data/somatic_mutations_table.tsv.gz")
somatic_mutations %>%
filter(is_neoantigen == TRUE) %>%
group_by(is_edited) %>%
summarise(number_neoantigens=n())Select epitopes to plot. use ninemers because they have the best training data in NetMHC
vcf_master_epitope <- read_tsv('../data/topiary_predicted_ninemers.tsv.gz')
plotdf <- vcf_master_epitope[!is.na(vcf_master_epitope$sample) & !is.na(vcf_master_epitope$ic50), ]
plotdf$effect_type <- as.factor(plotdf$effect_type)
plotdf$allele <- as.factor(plotdf$allele)
somatic_epitopes <- plotdf
top_epitopes <- plotdf[plotdf$ic50 < 25 | plotdf$VAF_AD > 10 ,]
# select neoantigens to label in primary
top_epitopes <- rbind(top_epitopes[top_epitopes$sample != 'Primary', ],
top_epitopes[top_epitopes$sample == 'Primary' &
(top_epitopes$VAF_AD > 50 | top_epitopes$ic50 < 150),])
# in recurrent A
top_epitopes <- rbind(top_epitopes[top_epitopes$sample != 'Recurrent_A', ],
top_epitopes[top_epitopes$sample == 'Recurrent_A' &
(top_epitopes$VAF_AD > 25 | top_epitopes$ic50 < 150),])
# in recurrent B
top_epitopes <- rbind(top_epitopes[top_epitopes$sample != 'Recurrent_B', ],
top_epitopes[top_epitopes$sample == 'Recurrent_B' &
(top_epitopes$VAF_AD > 27 | top_epitopes$ic50 < 150),])
# in recurrent C
top_epitopes <- rbind(top_epitopes[top_epitopes$sample != 'Recurrent_C', ],
top_epitopes[top_epitopes$sample == 'Recurrent_C' &
(top_epitopes$VAF_AD > 11 | top_epitopes$ic50 < 150),])
# make sure there are no NA labels and label only those with > 10 reads suporting the variant allele
top_epitopes <- top_epitopes[!is.na(top_epitopes$sample) & !is.na(top_epitopes$ic50), ]
top_epitopes <- top_epitopes[top_epitopes$AD_T > 10, ]Plot a scatter of putatively immunogenic (ninemer) neoantigens
neoantigen_scatter <- ggplot(plotdf) +
aes(x=log(ic50), y=VAF_AD, color = allele, size=AD_T) +
geom_jitter(alpha=0.7) +
geom_label_repel(data = top_epitopes, aes(label = gene), size = 3, force=3, show.legend = FALSE ) +
geom_vline(xintercept=log(500), linetype="dashed") +
facet_wrap(. ~ sample, nrow=2, scales = 'free') +
labs(x="Binding Affinity: log(ic50)", y='VAF') +
theme_classic() +
theme +
guides(color = guide_legend(override.aes = list(size=5))) +
scale_color_brewer(palette = 'Spectral')
neoantigen_scattersource table for the patient’s alleles taken from http://www.allelefrequencies.net/
allele_pop_freq <- read_csv('../data/pt_allele_global_pop_frequencies.csv.gz')
colnames(allele_pop_freq) <- c('allele', 'population', 'percent_indiv_with_allele', 'allele_frequency', 'sample_size', 'location')
allele_pop_freq <- allele_pop_freq %>% group_by(allele) %>% summarise(mean_popAF=mean(allele_frequency))
# Summary of mean global population allele frequency
allele_pop_freq# The most common allele is therefore...
allele_pop_freq[allele_pop_freq$mean_popAF == max(allele_pop_freq$mean_popAF), ]prepare TMB df to show what fraction of the TMB comes from immunogenic neoantigens
tmb_df <- somatic_mutations %>% group_by(sample, is_neoantigen) %>%
summarise(count=n(),tmb=count/54) %>%
ungroup() %>%
mutate(sample = fct_relevel(sample, c('Primary', "Recurrent_A", 'Recurrent_B', 'Recurrent_C')))
tmb2 <- tmb_df %>% group_by(sample) %>% summarise(tmb=sum(tmb)) %>% inner_join(tmb_df[tmb_df$is_neoantigen==TRUE,c('count', "is_neoantigen", 'sample')], by=c('sample'))
tmb_df %>% filter(is_neoantigen)Select which neoantigens to show.
top_genes <- vcf_master_epitope %>%
filter(ic50 < 500) %>% # ic50 binding cutoff
filter(AD_T > 5) %>% # at least 5 reads
filter(length == 9) %>% # make sure you have 9mers
group_by(sample) %>%
top_n(50, wt=-ic50) # get top 50 ic50's per sample
top_genes <- unique(c(levels(as.factor(top_genes$gene)),
'PTEN', 'TP53', 'QKI', 'MAPK2', 'MAPK1', 'MAPK4', 'VEPH1',
'TGFBI',"SLC9A4", 'EGFR', 'CASP5', "MAP7", "MAP3K1", "MAP3K7",
"PTPN5", "PTPN11", "BRAF", "PTPRH", "AKT1", "AKT2"))
top_genes <- top_genes[!(top_genes %in%
c('TTC39A', 'FLNC', 'USP48', 'AFM', 'LRRC32', 'GSR', 'CHRD',
'QKI', 'PTEN', 'EGFR', 'TP53', 'DAGLA', 'CENPI', 'C12orf45',
'ATP11A', 'ARHGEF28'))]
tmp <- vcf_master_epitope %>% mutate(ID=paste(sample, gene, ic50, sep='_')) %>% dplyr::select(ID, allele, length, combo, is_edited)
ninemer_df <- vcf_master_epitope %>%
filter(gene %in% top_genes) %>%
group_by(sample, gene) %>% summarise(min_ic50=min(ic50)) %>%
mutate(ID=paste(sample, gene, min_ic50, sep='_')) %>%
left_join(tmp, by=c('ID')) %>%
dplyr::select(-ID) %>%
filter(min_ic50 < 500)
ninemer_df$sample <- fct_relevel(ninemer_df$sample, c('Primary', "Recurrent_A", 'Recurrent_B', 'Recurrent_C'))
ninemer_df$gene <- as.factor(ninemer_df$gene)
rm(tmp, top_genes)
# order gene factor by number of samples the gene shows up in
tmp <- data.frame(samples_per_gene=summary(as.factor(ninemer_df$gene)),
gene=names(summary(as.factor(ninemer_df$gene))))
ninemer_df <- left_join(ninemer_df, tmp, by=c('gene'))
ninemer_df$gene <- as.factor(ninemer_df$gene)
ninemer_df$gene <- fct_reorder(ninemer_df$gene, .x=ninemer_df$samples_per_gene)
rm(tmp)
ninemer_df$is_edited <- as.character(ninemer_df$is_edited)
ninemer_df$allele <- as.factor(ninemer_df$allele)tmb_barplot <- ggplot(data=tmb_df) +
aes(x=sample,y=tmb, fill=is_neoantigen) +
geom_bar(stat='identity') +
geom_text(data=tmb2, aes(label=paste('N =',count), fill=NULL),
size=8.5,nudge_y = 0.5, show.legend=FALSE) +
labs(x='', y='TMB (Muts/MB)', fill='is neoantigen?') +
theme_classic() +
theme +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
scale_fill_manual(values = c('black', "grey40"), labels = c('No', 'Yes'))
primary_loss <- ggplot(data=ninemer_df[ninemer_df$is_edited == 'primary',]) +
aes(x=sample, y=gene, fill=allele, label=min_ic50) +
geom_tile(color='white', size=1) +
geom_text(size=7) +
scale_x_discrete(drop=FALSE) +
labs(x='',y='primary', fill='HLA Allele') +
theme_classic() +
theme +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
scale_fill_brewer(palette = "BrBG", drop=FALSE)
recc_gain <- ggplot(data=ninemer_df[ninemer_df$is_edited == 'recurrent',]) +
aes(x=sample, y=gene, fill=allele, label=min_ic50) +
geom_tile(color='white', size=1) +
scale_x_discrete(drop=FALSE) +
geom_text(size=7) +
labs(x='',y='recurrent', fill='HLA Allele') +
theme_classic() + rotate_x_text(angle=45) +
theme +
scale_fill_brewer(palette = "BrBG", drop=FALSE)
immunoprint_ninemer_chart <- ggarrange(tmb_barplot, primary_loss, recc_gain,
nrow=3, align = 'v', heights = c(1.2,1.7,7.5))
immunoprint_ninemer_chartRead in xCELL results.
xcell_results <- read_tsv('../data/xCell_filtered_cpm_for_xcell_xCell_0704072519.results.txt.gz',
skip=1, col_names=c("cell_type","Recurrent_A", "Recurrent_B",
"Recurrent_C", "Primary")) %>%
tidyr::gather(sample, percent, c("Recurrent_A", "Recurrent_B",
"Recurrent_C", "Primary"), convert=TRUE, factor_key=TRUE) %>%
mutate(deconv_method='xCell',
cell_type = factor(cell_type, levels = rownames(
read.table('../data/xCell_filtered_cpm_for_xcell_xCell_0704072519.results.txt.gz',
sep='\t',stringsAsFactors=FALSE,row.names=1))),
sample=as.factor(sample))
xcell_resultsShow all xcell results, with the exception of stromal and other non-immune cell-types.
# filter xcell results to show a subset of celltypes
cell_types <- c(# xCELL's aggregate scores
"MicroenvironmentScore", "StromaScore", "ImmuneScore",
# T-lymphocytes
"Th1 cells","Th2 cells","Tregs", "CD4+ memory T-cells",
"CD4+ naive T-cells","CD4+ T-cells","CD4+ Tcm","CD4+ Tem",
"CD8+ naive T-cells","CD8+ T-cells","CD8+ Tcm","CD8+ Tem",
# B-lymphocytes
"pro B-cells","Memory B-cells","naive B-cells",
"Class-switched memory B-cells","B-cells",
# innate popupations
"Macrophages","Macrophages M1","Macrophages M2",
"Monocytes","Neutrophils","Mast cells","Basophils",
"NK cells","DC", "pDC")
xcell_results <- xcell_results %>%
filter(cell_type %in% cell_types) %>%
mutate(cell_type = factor(as.character(cell_type), levels=rev(cell_types))) %>%
# remove celltypes with estimated negative percentages
filter(percent >= 0)
xcell_showall <- ggplot(data=xcell_results) +
aes(x=cell_type, y=percent*100, color=sample, group=sample) +
geom_point(alpha=0.75, size=8) + geom_line(alpha=0.6) +
theme_classic() + theme + scale_color_manual(values=cbpal) +
rotate_x_text(angle=45) + scale_x_discrete(drop=FALSE) +
labs(y='percent (%)', x='', title='xCELL quantification') + coord_flip()
xcell_showallFilter xCELL results to showcase a subset of celltypes.
xcell_filt <- xcell_results %>%
filter(cell_type %in%
c("naive B-cells","Memory B-cells","Tregs","Th1 cells",
"Th2 cells","CD8+ T-cells","CD4+ naive T-cells","CD4+ memory T-cells",
"Macrophages M1","Macrophages M2","Monocytes")) %>%
mutate(cell_type=factor(as.character(cell_type),
levels = c("naive B-cells","Memory B-cells", "Tregs","Th1 cells",
"Th2 cells", "CD8+ T-cells","CD4+ naive T-cells",
"CD4+ memory T-cells", "Macrophages M1",
"Macrophages M2","Monocytes")))
xcell_showfilt <- ggplot(data=xcell_filt) +
aes(x=cell_type, y=percent*100, color=sample, group=sample) +
geom_point(alpha=0.75, size=8) + geom_line(alpha=0.6) +
theme_classic() + theme + scale_color_manual(values=cbpal) +
rotate_x_text(angle=45) + scale_x_discrete(drop=FALSE) +
labs(y='percent (%)', x='') + coord_flip()
xcell_showfiltcell_quant <- ggarrange(ihc_barplot2, xcell_showfilt, ncol=2, widths=c(1,2))
grid.arrange(cell_quant, immunoprint_ninemer_chart, ncol=2, widths=c(1.8,1))png('../figures/figure3_rplots.png', width=24, height=22, units='in', res=300)
grid.arrange(cell_quant, immunoprint_ninemer_chart, ncol=2, widths=c(1.8,1))
dev.off()## quartz_off_screen
## 2
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lattice_0.20-38 gridExtra_2.3 ggpubr_0.2.3
## [4] magrittr_1.5 RColorBrewer_1.1-2 ggrepel_0.8.1
## [7] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
## [10] purrr_0.3.3 readr_1.3.1 tidyr_1.0.0
## [13] tibble_2.1.3 ggplot2_3.2.1 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.10 haven_2.1.1 colorspace_1.4-1
## [5] vctrs_0.2.0 generics_0.0.2 htmltools_0.4.0 yaml_2.2.0
## [9] rlang_0.4.1 pillar_1.4.2 glue_1.3.1 withr_2.1.2
## [13] modelr_0.1.5 readxl_1.3.1 lifecycle_0.1.0 munsell_0.5.0
## [17] ggsignif_0.6.0 gtable_0.3.0 cellranger_1.1.0 rvest_0.3.4
## [21] evaluate_0.14 labeling_0.3 knitr_1.25 broom_0.5.2
## [25] Rcpp_1.0.2 scales_1.0.0 backports_1.1.5 jsonlite_1.6
## [29] hms_0.5.2 digest_0.6.22 stringi_1.4.3 cowplot_1.0.0
## [33] cli_1.1.0 tools_3.6.1 lazyeval_0.2.2 crayon_1.3.4
## [37] pkgconfig_2.0.3 zeallot_0.1.0 ellipsis_0.3.0 xml2_1.2.2
## [41] lubridate_1.7.4 assertthat_0.2.1 rmarkdown_1.16 httr_1.4.1
## [45] rstudioapi_0.10 R6_2.4.0 nlme_3.1-141 compiler_3.6.1